This document details Kegg pathways and creation of the presence/absence plot
Get Kegg orthologs for all regions in the pangenome. This is done in anvio by using the “summarize” option in anvi interactive.
summary <- read.csv("../output/pangenome_gene_clusters_summary.txt", sep = "\t")
summary <- summary[,3:18]
datatable(summary)
genes <- read.csv("~/Downloads/sulf_genomes/sulfitobacter/SUMMARY_variable_region/sulfitobacter_gene_clusters_summary.txt", sep = "\t")
ko_of_interest <- read.csv("~/Downloads/sulf_genomes/KO_of_interest.csv")
# choose genes you want to plot
ko_of_interest <- subset(ko_of_interest, pathway == "terminal oxidase")
index <- str_detect(genes$KOfam_ACC, paste(ko_of_interest$ko, collapse = "|"))
genes_small <- genes[which(index),]
a <- ggplot(genes_small) +
geom_bar(aes(KEGG_Module, fill = bin_name)) +
theme_bw() +
facet_wrap(~genome_name) +
scale_x_discrete(guide = guide_axis(angle = 90))
a
summary <- read.csv("../output/pangenome_gene_clusters_summary.txt", sep = "\t")
summary <- summary[,c("genome_name", "KOfam_ACC")]
summary <- subset(summary, KOfam_ACC != "")
test <- as.matrix(table(summary$genome_name, summary$KOfam_ACC))
a <- pheatmap(test, show_colnames=F)
a
anvi-compute-genome-similarity --external-genomes sulf_external_genomes.txt --program pyANI --output-dir combined/ANI --num-threads 4 --pan-db combined/pangenome/pangenome-PAN.db